In [1]:
%pylab inline
from astropy.io import fits
from sklearn.ensemble import ExtraTreesRegressor
import pickle
In [2]:
gdr2val = fits.getdata('../output/GDR2_207/GDR2_207_cleaned_0.0025sampling_validation.fits')
In [3]:
gdr2 = fits.getdata("../output/GDR2_207/GDR2_207_cleaned_0.0025sampling.fits")
# cleaning nan parallax errors
pe = gdr2.parallax_error
clean = ~np.isnan(pe)
print(len(gdr2))
gdr2 = gdr2[clean]
print(len(gdr2))
print(gdr2.dtype.names)
In [4]:
def gmagerror(flux,fluxerror):
"""
calculates the symmetric gmag error from fluxes, only good approximation for low values
"""
def flux2mag(f):
return(-2.5*np.log10(f)+25.688365)
gp = flux2mag(flux + fluxerror)
gm = flux2mag(flux - fluxerror)
return(np.divide(gm-gp,2))
In [5]:
# training vpu and gnobs on l and b
g = gdr2.phot_g_mean_mag
bprp = gdr2.phot_bp_mean_mag - gdr2.phot_rp_mean_mag
l = gdr2.l#gdr2.ecl_lon#gdr2.l
b = gdr2.b#gdr2.ecl_lat#np.abs(np.sin(np.divide(gdr2.ecl_lat,np.pi/180.)))#gdr2.b
pe = gdr2.parallax_error
vp = gdr2.visibility_periods_used
gn = gdr2.phot_g_n_obs
f = gdr2.phot_g_mean_flux
fe = gdr2.phot_g_mean_flux_error
ge = gmagerror(f,fe)
#rve = gdr2.radial_velocity_error
X = np.vstack((l,b)).T
y = np.vstack((vp,gn)).T
model = ExtraTreesRegressor(n_estimators=10, criterion='mse', max_depth=None,
min_samples_split=5, min_samples_leaf=1,
min_weight_fraction_leaf=0.0, max_features='auto',
max_leaf_nodes=None, min_impurity_decrease=0.0,
min_impurity_split=None, bootstrap=True, oob_score=True,
n_jobs=1, random_state=None, verbose=0, warm_start=False)
model.fit(X,y)
Xval = np.vstack((gdr2val.l,gdr2val.b)).T
pe = gdr2val.parallax_error
vp = gdr2val.visibility_periods_used
gn = gdr2val.phot_g_n_obs
ge = gdr2val.phot_g_mean_flux_error
y_val = np.vstack((vp,gn)).T
y_pred = model.predict(Xval)
print(model.feature_importances_)
names = ['visibility_periods_used','g_n_obs']
for i,item in enumerate(names):
print(i,item)
plt.plot(y_val[:,i],y_pred[:,i], ',', alpha = 0.1)
plt.xlabel(item + '_real')
plt.ylabel(item + '_predicted')
plt.yscale("log")
plt.xscale("log")
plt.show()
plt.close()
filename = "lb2vpunobs_model"
pickle.dump(model,open(filename,'wb'))
In [6]:
vpu_val = y_pred[:,0]
nobs_val = y_pred[:,1]
# training parallax_error and gmagnitude error on vpu, gnobs, g, bp-rp
g = gdr2.phot_g_mean_mag
bprp = gdr2.phot_bp_mean_mag - gdr2.phot_rp_mean_mag
l = gdr2.ecl_lon#gdr2.l
b = gdr2.ecl_lat#np.abs(np.sin(np.divide(gdr2.ecl_lat,np.pi/180.)))#gdr2.b
pe = gdr2.parallax_error
vp = gdr2.visibility_periods_used
gn = gdr2.phot_g_n_obs
f = gdr2.phot_g_mean_flux
fe = gdr2.phot_g_mean_flux_error
ge = gmagerror(f,fe)
#rve = gdr2.radial_velocity_error
X = np.vstack((g,bprp,vp,gn)).T
y = np.vstack((pe,ge)).T
model = ExtraTreesRegressor(n_estimators=10, criterion='mse', max_depth=None,
min_samples_split=5, min_samples_leaf=1,
min_weight_fraction_leaf=0.0, max_features='auto',
max_leaf_nodes=None, min_impurity_decrease=0.0,
min_impurity_split=None, bootstrap=True, oob_score=True,
n_jobs=1, random_state=None, verbose=0, warm_start=False)
model.fit(X,y)
Xval = np.vstack((gdr2val.phot_g_mean_mag,gdr2val.phot_bp_mean_mag-gdr2val.phot_rp_mean_mag,
vpu_val,nobs_val)).T
pe = gdr2val.parallax_error
vp = gdr2val.visibility_periods_used
gn = gdr2val.phot_g_n_obs
ge = gmagerror(gdr2val.phot_g_mean_flux,gdr2val.phot_g_mean_flux_error)
y_val = np.vstack((pe,ge)).T
y_pred = model.predict(Xval)
print(model.feature_importances_)
names = ['parallax_error','g_mag_error']
for i,item in enumerate(names):
print(i,item)
plt.plot(y_val[:,i],y_pred[:,i], ',', alpha = 0.1)
plt.xlabel(item + '_real')
plt.ylabel(item + '_predicted')
plt.yscale("log")
plt.xscale("log")
plt.show()
plt.close()
filename = "gbprpvpunobs2pege_model"
pickle.dump(model,open(filename,'wb'))
In [7]:
# radial_velocity_error on g, bp-rp, teff
cut = np.isnan(gdr2.radial_velocity)
print(len(gdr2))
gdr2 = gdr2[~cut]
print(len(gdr2))
cut = np.isnan(gdr2.teff_val)
print(len(gdr2))
gdr2 = gdr2[~cut]
print(len(gdr2))
g = gdr2.phot_g_mean_mag
bprp = gdr2.phot_bp_mean_mag - gdr2.phot_rp_mean_mag
te = gdr2.teff_val
rve = gdr2.radial_velocity_error
#rve = gdr2.radial_velocity_error
X = np.vstack((g,bprp,te)).T
y = rve
model = ExtraTreesRegressor(n_estimators=10, criterion='mse', max_depth=None,
min_samples_split=5, min_samples_leaf=1,
min_weight_fraction_leaf=0.0, max_features='auto',
max_leaf_nodes=None, min_impurity_decrease=0.0,
min_impurity_split=None, bootstrap=True, oob_score=True,
n_jobs=1, random_state=None, verbose=0, warm_start=False)
model.fit(X,y)
cut = np.isnan(gdr2val.radial_velocity)
print(len(gdr2val))
gdr2val = gdr2val[~cut]
print(len(gdr2val))
cut = np.isnan(gdr2val.teff_val)
print(len(gdr2val))
gdr2val = gdr2val[~cut]
print(len(gdr2val))
Xval = np.vstack((gdr2val.phot_g_mean_mag,gdr2val.phot_bp_mean_mag-gdr2val.phot_rp_mean_mag,
gdr2val.teff_val)).T
rve = gdr2val.radial_velocity_error
y_val = rve
y_pred = model.predict(Xval)
print(model.feature_importances_)
plt.plot(y_val,y_pred, ',', alpha = 0.1)
plt.xlabel('rve_real')
plt.ylabel('rve_predicted')
plt.yscale("log")
plt.xscale("log")
plt.show()
plt.close()
filename = "gbprpteff2rvse_model"
pickle.dump(model,open(filename,'wb'))
In [8]:
g = gdr2.phot_bp_mean_mag
f = gdr2.phot_bp_mean_flux
fe = gdr2.phot_bp_mean_flux_error
t = -2.5*np.log10(f)
print(t)
print(g)
print(t-g)
def flux2mag(flux):
return(-2.5*np.log10(flux)+25.351388)
gp = flux2mag(f + fe)
gm = flux2mag(f - fe)
d1 = gp-g
d2 = g-gm
# assymetry in magnitude error
plt.plot(d1,d2,'.', alpha = 0.1)
plt.plot([-0.14,0],[-0.14,0])
plt.xlim((-0.03,0.0))
plt.ylim((-0.03,0.0))
bpme = np.divide(gm-gp,2)
In [9]:
g = gdr2.phot_rp_mean_mag
f = gdr2.phot_rp_mean_flux
fe = gdr2.phot_rp_mean_flux_error
t = -2.5*np.log10(f)
print(t)
print(g)
print(t-g)
def flux2mag(flux):
return(-2.5*np.log10(flux)+24.7619)
gp = flux2mag(f + fe)
gm = flux2mag(f - fe)
d1 = gp-g
d2 = g-gm
# assymetry in magnitude error
plt.plot(d1,d2,'.', alpha = 0.1)
plt.plot([-0.14,0],[-0.14,0])
plt.xlim((-0.03,0.0))
plt.ylim((-0.03,0.0))
rpme = np.divide(gm-gp,2)
In [10]:
plt.plot(rpme,g,',', alpha = 0.1)
plt.yscale('log')
plt.xscale('log')
In [11]:
nobs = np.genfromtxt('errors/nobs.txt', names = True)
scaling_factor_dr2 = 0.37
number_obs = np.round(scaling_factor_dr2*np.interp(np.abs(np.sin(gdr2.ecl_lat)),nobs['sinbeta'],nobs['N_obs']))
plt.plot(number_obs,gdr2.phot_g_n_obs,'.',alpha = 0.01)
plt.plot([6,22],[6,22],)
plt.xlabel('_real')
plt.ylabel('_predicted')
plt.yscale("log")
plt.xscale("log")
plt.show()
plt.close()